{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Upgrading our MNIST Network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "I:0 Test-Acc:0.394 Train-Acc:0.156\n",
      "I:10 Test-Acc:0.6867 Train-Acc:0.723\n",
      "I:20 Test-Acc:0.7025 Train-Acc:0.732\n",
      "I:30 Test-Acc:0.734 Train-Acc:0.763\n",
      "I:40 Test-Acc:0.7663 Train-Acc:0.794\n",
      "I:50 Test-Acc:0.7913 Train-Acc:0.819\n",
      "I:60 Test-Acc:0.8102 Train-Acc:0.849\n",
      "I:70 Test-Acc:0.8228 Train-Acc:0.864\n",
      "I:80 Test-Acc:0.831 Train-Acc:0.867\n",
      "I:90 Test-Acc:0.8364 Train-Acc:0.885\n",
      "I:100 Test-Acc:0.8407 Train-Acc:0.883\n",
      "I:110 Test-Acc:0.845 Train-Acc:0.891\n",
      "I:120 Test-Acc:0.8481 Train-Acc:0.901\n",
      "I:130 Test-Acc:0.8505 Train-Acc:0.901\n",
      "I:140 Test-Acc:0.8526 Train-Acc:0.905\n",
      "I:150 Test-Acc:0.8555 Train-Acc:0.914\n",
      "I:160 Test-Acc:0.8577 Train-Acc:0.925\n",
      "I:170 Test-Acc:0.8596 Train-Acc:0.918\n",
      "I:180 Test-Acc:0.8619 Train-Acc:0.933\n",
      "I:190 Test-Acc:0.863 Train-Acc:0.933\n",
      "I:200 Test-Acc:0.8642 Train-Acc:0.926\n",
      "I:210 Test-Acc:0.8653 Train-Acc:0.931\n",
      "I:220 Test-Acc:0.8668 Train-Acc:0.93\n",
      "I:230 Test-Acc:0.8672 Train-Acc:0.937\n",
      "I:240 Test-Acc:0.8681 Train-Acc:0.938\n",
      "I:250 Test-Acc:0.8687 Train-Acc:0.937\n",
      "I:260 Test-Acc:0.8684 Train-Acc:0.945\n",
      "I:270 Test-Acc:0.8703 Train-Acc:0.951\n",
      "I:280 Test-Acc:0.8699 Train-Acc:0.949\n",
      "I:290 Test-Acc:0.8701 Train-Acc:0.94"
     ]
    }
   ],
   "source": [
    "import numpy as np, sys\n",
    "np.random.seed(1)\n",
    "\n",
    "from keras.datasets import mnist\n",
    "\n",
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "\n",
    "images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])\n",
    "\n",
    "one_hot_labels = np.zeros((len(labels),10))\n",
    "for i,l in enumerate(labels):\n",
    "    one_hot_labels[i][l] = 1\n",
    "labels = one_hot_labels\n",
    "\n",
    "test_images = x_test.reshape(len(x_test),28*28) / 255\n",
    "test_labels = np.zeros((len(y_test),10))\n",
    "for i,l in enumerate(y_test):\n",
    "    test_labels[i][l] = 1\n",
    "\n",
    "def tanh(x):\n",
    "    return np.tanh(x)\n",
    "\n",
    "def tanh2deriv(output):\n",
    "    return 1 - (output ** 2)\n",
    "\n",
    "def softmax(x):\n",
    "    temp = np.exp(x)\n",
    "    return temp / np.sum(temp, axis=1, keepdims=True)\n",
    "\n",
    "alpha, iterations, hidden_size = (2, 300, 100)\n",
    "pixels_per_image, num_labels = (784, 10)\n",
    "batch_size = 100\n",
    "\n",
    "weights_0_1 = 0.02*np.random.random((pixels_per_image,hidden_size))-0.01\n",
    "weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1\n",
    "\n",
    "for j in range(iterations):\n",
    "    correct_cnt = 0\n",
    "    for i in range(int(len(images) / batch_size)):\n",
    "        batch_start, batch_end=((i * batch_size),((i+1)*batch_size))\n",
    "        layer_0 = images[batch_start:batch_end]\n",
    "        layer_1 = tanh(np.dot(layer_0,weights_0_1))\n",
    "        dropout_mask = np.random.randint(2,size=layer_1.shape)\n",
    "        layer_1 *= dropout_mask * 2\n",
    "        layer_2 = softmax(np.dot(layer_1,weights_1_2))\n",
    "\n",
    "        for k in range(batch_size):\n",
    "            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))\n",
    "\n",
    "        layer_2_delta = (labels[batch_start:batch_end]-layer_2) / (batch_size * layer_2.shape[0])\n",
    "        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)\n",
    "        layer_1_delta *= dropout_mask\n",
    "\n",
    "        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n",
    "        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n",
    "\n",
    "    test_correct_cnt = 0\n",
    "\n",
    "    for i in range(len(test_images)):\n",
    "\n",
    "        layer_0 = test_images[i:i+1]\n",
    "        layer_1 = tanh(np.dot(layer_0,weights_0_1))\n",
    "        layer_2 = np.dot(layer_1,weights_1_2)\n",
    "\n",
    "        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))\n",
    "    if(j % 10 == 0):\n",
    "        sys.stdout.write(\"\\n\"+ \\\n",
    "         \"I:\" + str(j) + \\\n",
    "         \" Test-Acc:\"+str(test_correct_cnt/float(len(test_images)))+\\\n",
    "         \" Train-Acc:\" + str(correct_cnt/float(len(images))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
